uni_cox_res2 %>% filter(index == TRUE) %>% select(-index) %>%
DT::datatable(filter='top', editable = 'cell',extensions = 'Buttons',
options = list(dom = 'Blfrtip',
scrollX = TRUE,
scrollY = TRUE,
buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
lengthMenu = list(c(5,25,50,100,-1),
c(5,25,50,100,"All"))))
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
uni_gene_res <- uni_cox_res2 %>% filter(index==TRUE) %>%
filter(BH<=0.05) %>% arrange(BH)
cox_sig_dat <- cox_dat[c(1:100),colnames(cox_dat) %in% c("time","status","age_at_diagnosis","cancer_stage",uni_gene_res$var_name)]
t1 = Sys.time()
boot_lst <- boot_f1(
df = cox_sig_dat,
nboot = 100,
boot_ft = 20,
seed = 20231106
)
m <- c("lasso")
t5 = Sys.time()
model_out_all <- lapply(1:length(m), function(x) {
lapply(1:length(boot_lst), function(y) {
HDSI_model_f(boot_df = boot_lst[[y]], method = m[[x]])
})
})
## Warning: from glmnet C++ code (error code -30001); Numerical error at 1th
## lambda value; solutions for larger values of lambda returned
## Warning in getcoef(fit, nvars, nx, vnames): an empty model has been returned;
## probably a convergence issue
t6 = Sys.time()
opt_qtl = 0.05
Rf = 1.70
Sys.time()
## [1] "2024-01-18 20:30:41 EST"
selected_dat <- lapply(1:length(m), function(x) {
## bind B boot sample results into a dataframe
perf_tmp <- model_out_all[[x]] %>% bind_rows()
fe_star <- perf_tmp %>% group_by(model_cindex) %>% summarise(n=n())
### compute min_cindex
min_cindex <- perf_tmp %>%
group_by(varname) %>%
summarise(min_cindex = min(model_cindex)) %>%
mutate(
miu_min_cindex = mean(min_cindex),
# sigma_min_cindex = sqrt(mean((min_cindex - miu_min_cindex)^2) / (fe_star$n %>% unique() - 1)),
sd_min_cindex = sd(min_cindex),
Rf = Rf, ## hyperparameter
include_yn = min_cindex > (miu_min_cindex + Rf * sd_min_cindex)
)
### compute coef estimate and CI
beta_quantile <- perf_tmp %>%
group_by(varname) %>%
mutate(quantile = opt_qtl) %>% ## quantile is a hyperparameter
summarise(
qtl = opt_qtl,
beta_hat = mean(X1),
qtl_lower = quantile(X1, probs = qtl/2),
qtl_upper = quantile(X1, probs = 1 - (qtl /2) ),
include0_yn = !(((qtl_lower <= 0) & (0 <= qtl_upper)) | ((qtl_upper <= 0) & (0 <= qtl_lower)))
)
### join beta and cindex in a dataframe; filter selected features
perf <- full_join(beta_quantile, min_cindex, by = "varname") %>%
filter(include0_yn == TRUE & include_yn == TRUE)
### adding back main effect if only interaction terms are selected
included_inter <- perf$varname[str_detect(perf$varname, ":")] ## detect interaction terms
## detect main effect terms from interaction
included_inter1 <- stringr::str_split(included_inter, ":") %>% unlist()
## final selected features
included_fe <- c(perf$varname, included_inter1) %>% unique()
res <- list(perf,included_fe)
res
})
## [[1]]代表lasso方法;
selected_vars_dat <- data.frame(var_name =selected_dat[[1]][[2]]) %>%
mutate(new_var = str_replace_all(var_name,":","\\*"),
index = grepl("age",new_var)) %>%
filter(index==FALSE)
select_gene <- selected_vars_dat$new_var
length(select_gene)
## [1] 840
selected_genes <- paste(select_gene, collapse = "+")
gene_string <- capture.output(cat(selected_genes)) %>% paste(collapse = "\n")
cov_var_paste <- paste(gene_string,
'age_at_diagnosis+cancer_stage',sep = "+")
formula1 <- paste("surv_object ~",cov_var_paste
)
formula2 <- paste("surv_object ~",gene_string)
surv_object <- Surv(time = cox_dat$time,
event = cox_dat$status)
formula <- as.formula(formula2)
cox_model <- coxph(formula, data = cox_dat)
## Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
## Ran out of iterations and did not converge
result_summary <- summary(cox_model)
tmp<- result_summary$coefficients %>% data.frame() %>% bind_rows()
c_index <- concordance(cox_model)
c_index
## Call:
## concordance.coxph(object = cox_model)
##
## n= 958
## Concordance= 0.9492 se= 0.006376
## concordant discordant tied.x tied.y tied.xy
## 185494 9933 0 30 0